# coding=utf-8
from locale import *
import os
import sys
import datetime
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import  requests
import threading
import time
from bs4 import BeautifulSoup

#基础网址
site_base_url = 'https://forums.huaren.us/showtopic.aspx?topicid='
#话题起始id号，可以累加变换找到指定的话题
input_topicid = input("Input topicid:").strip()
topicid = int(input_topicid)
#需要抓取的话题条数，用于循环个数
input_numoftopic = input("Input number of topics to crawler:").strip()
print("Number of topics to crawler is:", input_numoftopic)
numoftopic = int(input_numoftopic)

file_name = "huarenus_"+input_topicid+"_"+input_numoftopic+".csv"
print("File Name is:", file_name)


def run_crawler(topicid):
    options = webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    options.add_argument("--test-type")
    prefs = {"profile.managed_default_content_settings.images": 2}
    options.add_experimental_option("prefs", prefs)
    driver = webdriver.Chrome(chrome_options=options)

    #循环话题
    for i in range(1,numoftopic):
        current_topic_url = site_base_url+str(topicid)
        topicid = topicid+1
        driver.get(current_topic_url)
        print('CURRENT TOPIC URL:' + driver.current_url)
        #准备解析网页
        soup_topic = BeautifulSoup(driver.page_source,"html.parser")
        #所有用户名
        allUsername = soup_topic.select('div.poster')
        #打开文件
        file = open(file_name,'a',encoding='utf8')
        for item_username in allUsername:
            #得到具体用户名
            username = item_username.get_text().strip()
            print(username + '\n')
            file.writelines(username + '\n')
        file.close()
        # 等待5秒再执行下一产品
        time.sleep(1)


run_crawler(topicid)






